In [1]:
import os
import time
import numpy as np
import pandas as pd
import scipy.io as sio
from scipy.fftpack import fft
from IPython.display import display
import pywt
import scipy.stats
import datetime as dt
from collections import defaultdict, Counter
from sklearn.ensemble import GradientBoostingClassifier
In [2]:
def calculate_entropy(list_values):
counter_values = Counter(list_values).most_common()
probabilities = [elem[1]/len(list_values) for elem in counter_values]
entropy=scipy.stats.entropy(probabilities)
return entropy
def calculate_statistics(list_values):
n5 = np.nanpercentile(list_values, 5)
n25 = np.nanpercentile(list_values, 25)
n75 = np.nanpercentile(list_values, 75)
n95 = np.nanpercentile(list_values, 95)
median = np.nanpercentile(list_values, 50)
mean = np.nanmean(list_values)
std = np.nanstd(list_values)
var = np.nanvar(list_values)
rms = np.nanmean(np.sqrt(list_values**2))
return [n5, n25, n75, n95, median, mean, std, var, rms]
def calculate_crossings(list_values):
zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
no_zero_crossings = len(zero_crossing_indices)
mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
no_mean_crossings = len(mean_crossing_indices)
return [no_zero_crossings, no_mean_crossings]
def get_features(list_values):
entropy = calculate_entropy(list_values)
crossings = calculate_crossings(list_values)
statistics = calculate_statistics(list_values)
return [entropy] + crossings + statistics
def get_uci_har_features(dataset, labels, waveletname):
uci_har_features = []
for signal_no in range(0, len(dataset)):
features = []
for signal_comp in range(0,dataset.shape[2]):
signal = dataset[signal_no, :, signal_comp]
list_coeff = pywt.wavedec(signal, waveletname)
for coeff in list_coeff:
features += get_features(coeff)
uci_har_features.append(features)
X = np.array(uci_har_features)
Y = np.array(labels)
return X, Y
def get_train_test(df, y_col, x_cols, ratio):
"""
This method transforms a dataframe into a train and test set, for this you need to specify:
1. the ratio train : test (usually 0.7)
2. the column with the Y_values
"""
mask = np.random.rand(len(df)) < ratio
df_train = df[mask]
df_test = df[~mask]
Y_train = df_train[y_col].values
Y_test = df_test[y_col].values
X_train = df_train[x_cols].values
X_test = df_test[x_cols].values
return df_train, df_test, X_train, Y_train, X_test, Y_test
Download from here: https://github.com/mathworks/physionet_ECG_data/blob/master/ECGData.zip
In [3]:
filename = './data/ECG_data/ECGData.mat'
ecg_data = sio.loadmat(filename)
ecg_signals = ecg_data['ECGData'][0][0][0]
ecg_labels_ = ecg_data['ECGData'][0][0][1]
ecg_labels = list(map(lambda x: x[0][0], ecg_labels_))
dict_ecg_data = defaultdict(list)
for ii, label in enumerate(ecg_labels):
dict_ecg_data[label].append(ecg_signals[ii])
In [4]:
list_labels = []
list_features = []
for k, v in dict_ecg_data.items():
yval = list(dict_ecg_data.keys()).index(k)
for signal in v:
features = []
list_labels.append(yval)
list_coeff = pywt.wavedec(signal, 'sym5')
for coeff in list_coeff:
features += get_features(coeff)
list_features.append(features)
df = pd.DataFrame(list_features)
ycol = 'y'
xcols = list(range(df.shape[1]))
df.loc[:,ycol] = list_labels
df_train, df_test, X_train, Y_train, X_test, Y_test = get_train_test(df, ycol, xcols, ratio = 0.5)
In [6]:
cls = GradientBoostingClassifier(n_estimators=10000)
cls.fit(X_train, Y_train)
train_score = cls.score(X_train, Y_train)
test_score = cls.score(X_test, Y_test)
print("The Train Score is {}".format(train_score))
print("The Test Score is {}".format(test_score))